HW 5

Winter 2018, DSPA (HS650)

Name : Prabhjot Singh

SID: 82364840

UMich E-mail: prabhj@umich.edu

I certify that the following paper represents my own independent work and conforms with the guidelines of academic honesty described in the UMich student handbook.

Problem 5.1

Gnereate Random numbers between -10 and 10 to train.

rand_data = runif(1000, 0, 20)
#In order to take input from the user
#n = readline(prompt="Enter the power parameter : ")
#n = as.integer(n)
n = 2 # We take n=3 for the purpose of this homework.

pwr_df <- data.frame(rand_data, pwr_data = (rand_data) ^ (n)) 
plot(rand_data, pwr_df$pwr_data)

Train the neural net

library(neuralnet)
set.seed(1234)
net.pwr <- neuralnet(pwr_data ~ rand_data,  pwr_df, hidden=50, threshold=0.1, stepmax = 1e06)

report the NN

#print(net.pwr)

Generate testing data seq(from=10, to=20, step=0.1)

test_data <- seq(0, 30, 0.1)
test_data_pwr <- (test_data) ^ n

Try to predict the power values

Compute or predict for test data, test_data

pred_pwr <- compute(net.pwr, test_data)$net.result

compare real (test_data_pwr) and NN-predicted (pred_pwr) cube value of test_data

plot(pred_pwr, test_data_pwr, xlim=c(0, 900), ylim=c(0,900))
abline(0,1, col="red", lty=2)
legend("bottomright",  c("Pred vs. Actual Power", "Pred=Actual Line"), cex=0.8, lty=c(1,2), lwd=c(2,2),col=c("black","red"))

compare_df

compare_df <-data.frame(pred_pwr, test_data_pwr)

plot(test_data, test_data_pwr)
lines(test_data, pred_pwr, pch=22, col="red", lty=2)
legend("bottomright",  c("Actual Power","Predicted Power"), lty=c(1,2), lwd=c(2,2),col=c("black","red"))

#### We can obsever in the above plot that the actual power value is ver close to the predicted power, although upto some extent, after which it breaks. After trying it with diifrent number of hidden layers, this seems to the better solution.

Problem 5.2

Use the ALS dataset to study a rare but devastating progressive neurodegenerative disease, amyotrophic lateral sclerosis (ALS). Major clinically relevant questions include: What patient phenotypes can be automatically and reliably identified and used to predict the change of the ALSFRS slope over time?

#setwd("~/Downloads")
als_train = read.csv("ALS_TrainingData_2223.csv")
head(als_train)
##   ID Age_mean Albumin_max Albumin_median Albumin_min Albumin_range
## 1  1       65          57           40.5          38   0.066202091
## 2  2       48          45           41.0          39   0.010452962
## 3  3       38          50           47.0          45   0.008928571
## 4  4       63          47           44.0          41   0.012111135
## 5  5       63          47           45.5          42   0.008291874
## 6  6       36          51           47.0          46   0.009057971
##   ALSFRS_slope ALSFRS_Total_max ALSFRS_Total_median ALSFRS_Total_min
## 1 -0.965608466               30                28.0               22
## 2 -0.921717172               37                33.0               21
## 3 -0.914786967               24                14.0               10
## 4 -0.598360656               30                29.0               24
## 5 -0.444038929               32                27.5               20
## 6 -0.118352789               37                34.5               27
##   ALSFRS_Total_range ALT.SGPT._max ALT.SGPT._median ALT.SGPT._min
## 1        0.021164021            24             22.0            18
## 2        0.028725314            25             13.0             8
## 3        0.025000000            25             20.0            14
## 4        0.014962594            62             60.0            41
## 5        0.020373514            38             26.5            22
## 6        0.018115942            34             23.0            18
##   ALT.SGPT._range AST.SGOT._max AST.SGOT._median AST.SGOT._min
## 1     0.020905923            31             27.5            23
## 2     0.029616725            31             17.0            14
## 3     0.019642857            24             19.0            18
## 4     0.052369077            46             40.0            33
## 5     0.026533997            35             26.5            20
## 6     0.028985507            31             26.0            21
##   AST.SGOT._range Bicarbonate_max Bicarbonate_median Bicarbonate_min
## 1     0.027874564              30                 28              25
## 2     0.029616725              32                 28              25
## 3     0.010714286              35                 29              24
## 4     0.032418953              23                 20              20
## 5     0.024875622              32                 28              23
## 6     0.018115942              29                 26              22
##   Bicarbonate_range Blood.Urea.Nitrogen..BUN._max
## 1       0.017421603                        8.0322
## 2       0.012195122                        8.3973
## 3       0.019642857                        5.4765
## 4       0.007481297                        8.0322
## 5       0.014925373                        5.1114
## 6       0.012681159                        6.5718
##   Blood.Urea.Nitrogen..BUN._median Blood.Urea.Nitrogen..BUN._min
## 1                          7.11945                        6.5718
## 2                          4.74630                        4.0161
## 3                          4.38120                        3.6510
## 4                          8.03220                        6.5718
## 5                          4.19865                        3.6510
## 6                          5.11140                        4.0161
##   Blood.Urea.Nitrogen..BUN._range bp_diastolic_max bp_diastolic_median
## 1                     0.005088502               90                  83
## 2                     0.007632753               80                  78
## 3                     0.003259821               86                  76
## 4                     0.003641895               90                  80
## 5                     0.002421891              100                  80
## 6                     0.004629891               84                  80
##   bp_diastolic_min bp_diastolic_range bp_systolic_max bp_systolic_median
## 1               69        0.055555556             160              139.0
## 2               64        0.028725314             140              132.5
## 3               58        0.050000000             120              110.0
## 4               70        0.049875312             150              130.0
## 5               68        0.053067993             160              130.0
## 6               60        0.043478261             140              115.0
##   bp_systolic_min bp_systolic_range Calcium_max Calcium_median Calcium_min
## 1             129       0.082010582     2.49500       2.220550     2.22055
## 2             104       0.064631957     2.32035       2.170650     2.02095
## 3              90       0.053571429     2.47005       2.295400     2.19560
## 4             120       0.074812968     2.47005       2.345300     2.23000
## 5             104       0.092868988     2.42015       2.257975     2.17065
## 6             100       0.072463768     2.39520       2.270450     2.17065
##   Calcium_range Chloride_max Chloride_median Chloride_min Chloride_range
## 1   0.000956272          109             108          103    0.020905923
## 2   0.000521603          108             102          100    0.013937282
## 3   0.000490089          108             106          104    0.007142857
## 4   0.000473934          109             107          106    0.007481297
## 5   0.000413765          107             104          100    0.011608624
## 6   0.000406793          110             105          101    0.016304348
##   Creatinine_max Creatinine_median Creatinine_min Creatinine_range
## 1          79.56             79.56          70.72      0.030801394
## 2          61.88             53.04          44.20      0.030801394
## 3          88.40             79.56          70.72      0.031571429
## 4          70.72             61.88          53.04      0.044089776
## 5          61.88             48.62          26.52      0.058640133
## 6         106.08             88.40          70.72      0.064057971
##   Gender_mean Glucose_max Glucose_median Glucose_min Glucose_range
## 1           1      7.4370         4.4955      4.2180   0.011216028
## 2           1      6.7710         4.9950      4.0515   0.004737805
## 3           2      5.6610         5.1060      4.2180   0.002576786
## 4           2      5.1060         4.7730      4.6620   0.001107232
## 5           1      7.4925         5.7165      5.0505   0.004049751
## 6           2      5.5500         5.1060      4.4400   0.002010870
##   hands_max hands_median hands_min hands_range Hematocrit_max
## 1         8          7.5         6 0.005291005           44.6
## 2         8          6.0         6 0.003590664           41.9
## 3         4          1.0         0 0.007142857           49.1
## 4         6          5.5         4 0.004987531           46.3
## 5         8          6.5         3 0.008488964           44.0
## 6         8          7.0         5 0.005434783           46.8
##   Hematocrit_median Hematocrit_min Hematocrit_range Hemoglobin_max
## 1             43.15           40.7      0.013588850            156
## 2             39.60           37.7      0.007317073            138
## 3             46.20           44.0      0.009107143            161
## 4             43.00           41.7      0.011471322            154
## 5             42.85           39.5      0.007462687            152
## 6             43.50           41.9      0.008876812            157
##   Hemoglobin_median Hemoglobin_min Hemoglobin_range leg_max leg_median
## 1             146.0            143      0.045296167       8        6.5
## 2             132.0            128      0.017421603       8        7.5
## 3             154.0            151      0.017857143       4        3.0
## 4             145.0            144      0.024937656       4        3.5
## 5             146.5            138      0.023217247       2        2.0
## 6             146.0            142      0.027173913       8        8.0
##   leg_min   leg_range mouth_max mouth_median mouth_min mouth_range
## 1       4 0.010582011         5          3.5         0 0.013227513
## 2       3 0.008976661         9          8.0         4 0.008976661
## 3       2 0.003571429        10          7.0         4 0.010714286
## 4       2 0.004987531        12         12.0        12 0.000000000
## 5       0 0.003395586        12         12.0        12 0.000000000
## 6       4 0.007246377         9          8.0         7 0.003623188
##   onset_delta_mean onset_site_mean Platelets_max Platelets_median
## 1            -1023               1           172            169.0
## 2             -341               1           286            264.0
## 3            -1181               1           233            213.0
## 4             -365               2           275            233.0
## 5            -1768               2           313            283.5
## 6             -334               1           220            194.0
##   Platelets_min Potassium_max Potassium_median Potassium_min
## 1           152           4.5             4.25           4.0
## 2           230           5.0             4.30           3.9
## 3           167           4.1             4.00           3.9
## 4           204           4.3             4.20           4.0
## 5           268           4.6             3.75           3.5
## 6           178           4.5             4.30           4.2
##   Potassium_range pulse_max pulse_median pulse_min pulse_range
## 1     0.001742160        79           68        61 0.047619048
## 2     0.001916376        90           76        64 0.046678636
## 3     0.000357143        82           73        60 0.039285714
## 4     0.000748130        84           72        68 0.039900249
## 5     0.001824212       101           96        74 0.044776119
## 6     0.000543478        88           66        60 0.050724638
##   respiratory_max respiratory_median respiratory_min respiratory_range
## 1               4                  3               3       0.002645503
## 2               4                  4               3       0.001795332
## 3               4                  4               4       0.000000000
## 4               3                  3               3       0.000000000
## 5               4                  4               3       0.001697793
## 6               4                  4               3       0.001811594
##   Sodium_max Sodium_median Sodium_min Sodium_range SubjectID trunk_max
## 1        148         145.5        143  0.017421603       533         8
## 2        142         138.0        136  0.010452962       649         8
## 3        145         143.0        140  0.008928571      1234         5
## 4        143         139.0        138  0.012468828      2492         5
## 5        143         140.0        138  0.008291874      2956         6
## 6        145         141.0        137  0.014492754      3085         8
##   trunk_median trunk_min trunk_range Urine.Ph_max Urine.Ph_median
## 1            7         7 0.002645503            6               6
## 2            7         5 0.005385996            7               5
## 3            0         0 0.008928571            6               5
## 4            5         3 0.004987531            7               6
## 5            4         1 0.008488964            6               5
## 6            8         7 0.001811594            8               6
##   Urine.Ph_min
## 1            6
## 2            5
## 3            5
## 4            5
## 5            5
## 6            5
str(als_train)
## 'data.frame':    2223 obs. of  101 variables:
##  $ ID                              : int  1 2 3 4 5 6 7 8 9 11 ...
##  $ Age_mean                        : int  65 48 38 63 63 36 55 55 37 72 ...
##  $ Albumin_max                     : num  57 45 50 47 47 51 46 45 48 44 ...
##  $ Albumin_median                  : num  40.5 41 47 44 45.5 47 44 42 46 42 ...
##  $ Albumin_min                     : num  38 39 45 41 42 46 40 38 41 38 ...
##  $ Albumin_range                   : num  0.0662 0.01045 0.00893 0.01211 0.00829 ...
##  $ ALSFRS_slope                    : num  -0.966 -0.922 -0.915 -0.598 -0.444 ...
##  $ ALSFRS_Total_max                : int  30 37 24 30 32 37 34 30 35 28 ...
##  $ ALSFRS_Total_median             : num  28 33 14 29 27.5 34.5 24 27.5 28.5 25.5 ...
##  $ ALSFRS_Total_min                : int  22 21 10 24 20 27 10 20 24 23 ...
##  $ ALSFRS_Total_range              : num  0.0212 0.0287 0.025 0.015 0.0204 ...
##  $ ALT.SGPT._max                   : num  24 25 25 62 38 34 80 38 47 39 ...
##  $ ALT.SGPT._median                : num  22 13 20 60 26.5 23 46 27 42 20 ...
##  $ ALT.SGPT._min                   : num  18 8 14 41 22 18 19 15 25 11 ...
##  $ ALT.SGPT._range                 : num  0.0209 0.0296 0.0196 0.0524 0.0265 ...
##  $ AST.SGOT._max                   : int  31 31 24 46 35 31 57 26 43 49 ...
##  $ AST.SGOT._median                : num  27.5 17 19 40 26.5 26 37 25 30 24 ...
##  $ AST.SGOT._min                   : num  23 14 18 33 20 21 22 16 24 17 ...
##  $ AST.SGOT._range                 : num  0.0279 0.0296 0.0107 0.0324 0.0249 ...
##  $ Bicarbonate_max                 : num  30 32 35 23 32 29 32 29 36 32 ...
##  $ Bicarbonate_median              : num  28 28 29 20 28 26 27.5 28 29 29.5 ...
##  $ Bicarbonate_min                 : num  25 25 24 20 23 22 23 25 20 27 ...
##  $ Bicarbonate_range               : num  0.01742 0.0122 0.01964 0.00748 0.01493 ...
##  $ Blood.Urea.Nitrogen..BUN._max   : num  8.03 8.4 5.48 8.03 5.11 ...
##  $ Blood.Urea.Nitrogen..BUN._median: num  7.12 4.75 4.38 8.03 4.2 ...
##  $ Blood.Urea.Nitrogen..BUN._min   : num  6.57 4.02 3.65 6.57 3.65 ...
##  $ Blood.Urea.Nitrogen..BUN._range : num  0.00509 0.00763 0.00326 0.00364 0.00242 ...
##  $ bp_diastolic_max                : int  90 80 86 90 100 84 98 80 90 90 ...
##  $ bp_diastolic_median             : num  83 78 76 80 80 80 86 74 80 80 ...
##  $ bp_diastolic_min                : int  69 64 58 70 68 60 80 64 80 70 ...
##  $ bp_diastolic_range              : num  0.0556 0.0287 0.05 0.0499 0.0531 ...
##  $ bp_systolic_max                 : int  160 140 120 150 160 140 134 134 135 140 ...
##  $ bp_systolic_median              : num  139 132 110 130 130 ...
##  $ bp_systolic_min                 : int  129 104 90 120 104 100 110 104 115 120 ...
##  $ bp_systolic_range               : num  0.082 0.0646 0.0536 0.0748 0.0929 ...
##  $ Calcium_max                     : num  2.5 2.32 2.47 2.47 2.42 ...
##  $ Calcium_median                  : num  2.22 2.17 2.3 2.35 2.26 ...
##  $ Calcium_min                     : num  2.22 2.02 2.2 2.23 2.17 ...
##  $ Calcium_range                   : num  0.000956 0.000522 0.00049 0.000474 0.000414 ...
##  $ Chloride_max                    : num  109 108 108 109 107 110 108 107 110 103 ...
##  $ Chloride_median                 : num  108 102 106 107 104 105 104 106 105 99 ...
##  $ Chloride_min                    : num  103 100 104 106 100 101 100 101 101 95 ...
##  $ Chloride_range                  : num  0.02091 0.01394 0.00714 0.00748 0.01161 ...
##  $ Creatinine_max                  : num  79.6 61.9 88.4 70.7 61.9 ...
##  $ Creatinine_median               : num  79.6 53 79.6 61.9 48.6 ...
##  $ Creatinine_min                  : num  70.7 44.2 70.7 53 26.5 ...
##  $ Creatinine_range                : num  0.0308 0.0308 0.0316 0.0441 0.0586 ...
##  $ Gender_mean                     : int  1 1 2 2 1 2 2 1 2 1 ...
##  $ Glucose_max                     : num  7.44 6.77 5.66 5.11 7.49 ...
##  $ Glucose_median                  : num  4.5 5 5.11 4.77 5.72 ...
##  $ Glucose_min                     : num  4.22 4.05 4.22 4.66 5.05 ...
##  $ Glucose_range                   : num  0.01122 0.00474 0.00258 0.00111 0.00405 ...
##  $ hands_max                       : int  8 8 4 6 8 8 6 8 6 8 ...
##  $ hands_median                    : num  7.5 6 1 5.5 6.5 7 4 8 1.5 7 ...
##  $ hands_min                       : int  6 6 0 4 3 5 1 5 0 6 ...
##  $ hands_range                     : num  0.00529 0.00359 0.00714 0.00499 0.00849 ...
##  $ Hematocrit_max                  : num  44.6 41.9 49.1 46.3 44 46.8 50.5 45.5 48 42 ...
##  $ Hematocrit_median               : num  43.1 39.6 46.2 43 42.9 ...
##  $ Hematocrit_min                  : num  40.7 37.7 44 41.7 39.5 41.9 44.1 37.1 45 38 ...
##  $ Hematocrit_range                : num  0.01359 0.00732 0.00911 0.01147 0.00746 ...
##  $ Hemoglobin_max                  : num  156 138 161 154 152 157 165 152 156 139 ...
##  $ Hemoglobin_median               : num  146 132 154 145 146 ...
##  $ Hemoglobin_min                  : num  143 128 151 144 138 142 151 122 149 125 ...
##  $ Hemoglobin_range                : num  0.0453 0.0174 0.0179 0.0249 0.0232 ...
##  $ leg_max                         : int  8 8 4 4 2 8 8 1 8 1 ...
##  $ leg_median                      : num  6.5 7.5 3 3.5 2 8 6 0.5 8 0 ...
##  $ leg_min                         : int  4 3 2 2 0 4 4 0 5 0 ...
##  $ leg_range                       : num  0.01058 0.00898 0.00357 0.00499 0.0034 ...
##  $ mouth_max                       : int  5 9 10 12 12 9 10 12 12 12 ...
##  $ mouth_median                    : num  3.5 8 7 12 12 8 6 12 12 12 ...
##  $ mouth_min                       : int  0 4 4 12 12 7 0 10 12 11 ...
##  $ mouth_range                     : num  0.01323 0.00898 0.01071 0 0 ...
##  $ onset_delta_mean                : int  -1023 -341 -1181 -365 -1768 -334 -268 -763 -440 -1324 ...
##  $ onset_site_mean                 : int  1 1 1 2 2 1 2 2 2 2 ...
##  $ Platelets_max                   : int  172 286 233 275 313 220 245 487 149 378 ...
##  $ Platelets_median                : num  169 264 213 233 284 ...
##  $ Platelets_min                   : num  152 230 167 204 268 178 191 212 109 281 ...
##  $ Potassium_max                   : num  4.5 5 4.1 4.3 4.6 4.5 4.5 4.7 4.6 4.4 ...
##  $ Potassium_median                : num  4.25 4.3 4 4.2 3.75 4.3 4.1 4.5 4.3 3.7 ...
##  $ Potassium_min                   : num  4 3.9 3.9 4 3.5 4.2 3.6 4.2 4 3.2 ...
##  $ Potassium_range                 : num  0.001742 0.001916 0.000357 0.000748 0.001824 ...
##  $ pulse_max                       : int  79 90 82 84 101 88 96 100 84 100 ...
##  $ pulse_median                    : num  68 76 73 72 96 66 80 80 68 100 ...
##  $ pulse_min                       : int  61 64 60 68 74 60 66 64 59 80 ...
##  $ pulse_range                     : num  0.0476 0.0467 0.0393 0.0399 0.0448 ...
##  $ respiratory_max                 : int  4 4 4 3 4 4 4 4 4 4 ...
##  $ respiratory_median              : num  3 4 4 3 4 4 3 4 4 4 ...
##  $ respiratory_min                 : int  3 3 4 3 3 3 2 1 4 4 ...
##  $ respiratory_range               : num  0.00265 0.0018 0 0 0.0017 ...
##  $ Sodium_max                      : num  148 142 145 143 143 145 145 145 146 147 ...
##  $ Sodium_median                   : num  146 138 143 139 140 ...
##  $ Sodium_min                      : num  143 136 140 138 138 137 136 139 138 132 ...
##  $ Sodium_range                    : num  0.01742 0.01045 0.00893 0.01247 0.00829 ...
##  $ SubjectID                       : int  533 649 1234 2492 2956 3085 3551 3971 4390 4772 ...
##  $ trunk_max                       : int  8 8 5 5 6 8 7 5 6 3 ...
##  $ trunk_median                    : num  7 7 0 5 4 8 5 3 3 3 ...
##  $ trunk_min                       : int  7 5 0 3 1 7 2 2 2 1 ...
##  $ trunk_range                     : num  0.00265 0.00539 0.00893 0.00499 0.00849 ...
##  $ Urine.Ph_max                    : num  6 7 6 7 6 8 9 6 7 7 ...
##   [list output truncated]
plot(als_train$ALT.SGPT._median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "pink"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")

plot(als_train$AST.SGOT._median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")

plot(als_train$Creatinine_median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")

plot(als_train$Glucose_median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")

plot(als_train$Hematocrit_median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("bottomleft", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")

plot(als_train$Platelets_median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")

plot(als_train$Potassium_median , als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")

plot(als_train$pulse_median, als_train$ALSFRS_slope, pch = 16, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")

plot(als_train$respiratory_median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")

It is observed from the above preliminary visualizations, that changes in these variables such as SGOT, SGPT, etc cause considerable variance in ALSFRS slope. Hence, we will consider only these 11 variable in our clustering analysis.

als_train = als_train[, c(7, 13, 17, 45, 48, 50, 58, 76, 78, 82, 87  )]
str(als_train)
## 'data.frame':    2223 obs. of  11 variables:
##  $ ALSFRS_slope      : num  -0.966 -0.922 -0.915 -0.598 -0.444 ...
##  $ ALT.SGPT._median  : num  22 13 20 60 26.5 23 46 27 42 20 ...
##  $ AST.SGOT._median  : num  27.5 17 19 40 26.5 26 37 25 30 24 ...
##  $ Creatinine_median : num  79.6 53 79.6 61.9 48.6 ...
##  $ Gender_mean       : int  1 1 2 2 1 2 2 1 2 1 ...
##  $ Glucose_median    : num  4.5 5 5.11 4.77 5.72 ...
##  $ Hematocrit_median : num  43.1 39.6 46.2 43 42.9 ...
##  $ Platelets_median  : num  169 264 213 233 284 ...
##  $ Potassium_max     : num  4.5 5 4.1 4.3 4.6 4.5 4.5 4.7 4.6 4.4 ...
##  $ pulse_max         : int  79 90 82 84 101 88 96 100 84 100 ...
##  $ respiratory_median: num  3 4 4 3 4 4 3 4 4 4 ...

Train a k-Means model on the data, select k

First, scale the data.

als_train_scaled = as.data.frame(lapply(als_train, scale))
str(als_train_scaled)
## 'data.frame':    2223 obs. of  11 variables:
##  $ ALSFRS_slope      : num  -0.381 -0.311 -0.3 0.209 0.457 ...
##  $ ALT.SGPT._median  : num  -0.705 -1.281 -0.833 1.731 -0.416 ...
##  $ AST.SGOT._median  : num  -0.164 -1.259 -1.05 1.139 -0.269 ...
##  $ Creatinine_median : num  0.828 -0.7 0.828 -0.191 -0.955 ...
##  $ Gender_mean       : num  -1.326 -1.326 0.754 0.754 -1.326 ...
##  $ Glucose_median    : num  -0.797 -0.396 -0.306 -0.574 0.184 ...
##  $ Hematocrit_median : num  0.304 0.011 0.555 0.291 0.279 ...
##  $ Platelets_median  : num  -1.326 0.479 -0.49 -0.11 0.85 ...
##  $ Potassium_max     : num  -0.0969 0.2804 -0.3986 -0.2477 -0.0214 ...
##  $ pulse_max         : num  -1.0484 -0.0578 -0.7783 -0.5981 0.9327 ...
##  $ respiratory_median: num  -0.968 0.665 0.665 -0.968 0.665 ...

First, conside k = 4.

library(stats)
set.seed(321)
als_clusters = kmeans(als_train_scaled, 4)

Evaluate the model performance using bar and silhouette plots and summarize the results

als_clusters$size
## [1] 322 469 727 705
require(cluster)
## Loading required package: cluster
dis = dist(als_train_scaled)
sil = silhouette(als_clusters$cluster, dis)
summary(sil)
## Silhouette of 2223 units in 4 clusters from silhouette.default(x = als_clusters$cluster, dist = dis) :
##  Cluster sizes and average silhouette widths:
##           322           469           727           705 
## 0.12820155816 0.02249440513 0.17184749584 0.19290026332 
## Individual silhouette widths:
##        Min.     1st Qu.      Median        Mean     3rd Qu.        Max. 
## -0.11509768  0.06421337  0.14735258  0.14069212  0.21914407  0.38073137

Silouhette plots

plot(sil, border = NA)

Bar Plots

par(mfrow=c(1, 1), mar=c(4, 4, 4, 2))
myColors <- c("darkblue", "red", "green", "brown", "pink", "purple", "yellow", "orange", "black", "grey", "violet")
barplot(t(als_clusters$centers), beside = TRUE, xlab="cluster", 
        ylab="value", col = myColors)
legend("top", ncol=2, legend = c("ALSFRS_Slope", "ALT.SGPT._median", "AST.SGOT._median", "Creatinine_median", "Glucose_median", "Hematocrit_median", "Platelets_median", "Potassium_median", "pulse_median", "respiratory_median"), fill = myColors)

Model Improvement

library(matrixStats)
## Warning: package 'matrixStats' was built under R version 3.4.3
kpp_init = function(dat, K) {
  x = as.matrix(dat)
  n = nrow(x)
  # Randomly choose a first center
  centers = matrix(NA, nrow=K, ncol=ncol(x))
  set.seed(123)
  centers[1,] = as.matrix(x[sample(1:n, 1),])
  for (k in 2:K) {
    # Calculate dist^2 to closest center for each point
    dists = matrix(NA, nrow=n, ncol=k-1)
    for (j in 1:(k-1)) {
      temp = sweep(x, 2, centers[j,], '-')
      dists[,j] = rowSums(temp^2)
    }
    dists = rowMins(dists)
    # Draw next center with probability proportional to dist^2
    cumdists = cumsum(dists)
    prop = runif(1, min=0, max=cumdists[n])
    centers[k,] = as.matrix(x[min(which(cumdists > prop)),])
  }
  return(centers)
}
clust_kpp = kmeans(als_train_scaled, kpp_init(als_train_scaled, 4), iter.max=100, algorithm='Lloyd')
clust_kpp$centers
##      ALSFRS_slope ALT.SGPT._median AST.SGOT._median Creatinine_median
## 1  0.009457390375    -0.1919809585    -0.2330132990    0.415148108699
## 2 -0.118766579920    -0.1053412037    -0.1458557826   -0.105103524451
## 3 -0.009312732085     1.6963808241     1.6327231516   -0.001095948809
## 4  0.021796588621    -0.5112895075    -0.4136034424   -0.549708219017
##      Gender_mean Glucose_median Hematocrit_median Platelets_median
## 1  0.74982221394  0.10933449473     0.39810308814    -0.2215399708
## 2 -0.04146929977 -0.53747210963    -3.21995565628     0.3286348126
## 3  0.56943959706  0.02847891749     0.44424299826    -0.2704489901
## 4 -1.30225518785 -0.02740116268     0.06316811015     0.3519608975
##    Potassium_max      pulse_max respiratory_median
## 1  0.03284406005 -0.07376431826     -0.01153114509
## 2 -0.07953469167 -0.05784627999      0.10745254721
## 3 -0.03577779458  0.04925534086      0.05400532842
## 4 -0.00824139930  0.09405089762     -0.03722160605
sil2 = silhouette(clust_kpp$cluster, dis)
summary(sil2)
## Silhouette of 2223 units in 4 clusters from silhouette.default(x = clust_kpp$cluster, dist = dis) :
##  Cluster sizes and average silhouette widths:
##          990          183          338          712 
## 0.1451373911 0.2811985903 0.1437985383 0.2080572550 
## Individual silhouette widths:
##        Min.     1st Qu.      Median        Mean     3rd Qu.        Max. 
## -0.02363685  0.11655112  0.17906602  0.17628701  0.23928280  0.41810262

Silouhette plot after model improvememnt.

plot(sil2, border = NA)

The above sil plot has better S(i) values, however, average sil width remains same.

Tuning the parameter k

n_rows <- 15
mat = matrix(0,nrow = n_rows)
for (i in 2:n_rows){
  set.seed(321)
  clust_kpp = kmeans(als_train_scaled, kpp_init(als_train_scaled, i), iter.max=100, algorithm='Lloyd')
  sil = silhouette(clust_kpp$cluster, dis)
  mat[i] = mean(as.matrix(sil)[,3])
}
colnames(mat) <- c("Avg_Silhouette_Value")
mat
##       Avg_Silhouette_Value
##  [1,]         0.0000000000
##  [2,]         0.2357663636
##  [3,]         0.1775384456
##  [4,]         0.1762870134
##  [5,]         0.1642306600
##  [6,]         0.1446392470
##  [7,]         0.1501165902
##  [8,]         0.1468196755
##  [9,]         0.1405140565
## [10,]         0.1352907413
## [11,]         0.1307925216
## [12,]         0.1291584508
## [13,]         0.1351560706
## [14,]         0.1283448130
## [15,]         0.1235528266
library(ggplot2)
  ggplot(data.frame(k=2:n_rows,sil=mat[2:n_rows]),aes(x=k,y=sil))+
  geom_line()+
  scale_x_continuous(breaks = 2:n_rows)

Considering the above sil plot, we must consider our model with k=3 as well.

k = 3
set.seed(31)
clust_kpp = kmeans(als_train_scaled, kpp_init(als_train_scaled, k), iter.max=200, algorithm="MacQueen")
sil3 = silhouette(clust_kpp$cluster, dis)
summary(sil3)
## Silhouette of 2223 units in 3 clusters from silhouette.default(x = clust_kpp$cluster, dist = dis) :
##  Cluster sizes and average silhouette widths:
##          743          183         1297 
## 0.2302292899 0.2936640609 0.1309692205 
## Individual silhouette widths:
##        Min.     1st Qu.      Median        Mean     3rd Qu.        Max. 
## -0.05753471  0.11170948  0.17908774  0.17753845  0.24177651  0.42254015
plot(sil3, border = NA)

Comparing these sil plots, we realize that it is better with k=3 than it was with k=4, with greater silouhette average.

Hierarchial Clustering

library(cluster)
pitch_sing = agnes(als_train_scaled, diss=FALSE, method='single')
pitch_comp = agnes(als_train_scaled, diss=FALSE, method='complete')
pitch_ward = agnes(als_train_scaled, diss=FALSE, method='ward')
sil_sing = silhouette(cutree(pitch_sing, k=3), dis)
sil_comp = silhouette(cutree(pitch_comp, k=5), dis)
sil_ward = silhouette(cutree(pitch_ward, k=4), dis)

Dendograms

library(ggdendro)
ggdendrogram(as.dendrogram(pitch_ward), leaf_labels=FALSE, labels=FALSE)

mean(sil_ward[,"sil_width"])
## [1] 0.1673683107
ggdendrogram(as.dendrogram(pitch_ward), leaf_labels=TRUE, labels=T, size=10)

summary(sil_ward)
## Silhouette of 2223 units in 4 clusters from silhouette.default(x = cutree(pitch_ward, k = 4), dist = dis) :
##  Cluster sizes and average silhouette widths:
##          748         1290          182            3 
## 0.2308496789 0.1116674344 0.2920677011 0.7256276452 
## Individual silhouette widths:
##        Min.     1st Qu.      Median        Mean     3rd Qu.        Max. 
## -0.17300143  0.09962072  0.16753389  0.16736831  0.23686987  0.79338869
summary(sil_comp)
## Silhouette of 2223 units in 5 clusters from silhouette.default(x = cutree(pitch_comp, k = 5), dist = dis) :
##  Cluster sizes and average silhouette widths:
##         2184           26            9            3            1 
## 0.4175241082 0.4173778610 0.4061845781 0.7265894469 0.0000000000 
## Individual silhouette widths:
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.3996074  0.3953038  0.4486734  0.4177058  0.4881391  0.7935982
plot(sil_ward, border = NA)

plot(sil_comp, border = NA)

library(mclust)
## Warning: package 'mclust' was built under R version 3.4.3
## Package 'mclust' version 5.4
## Type 'citation("mclust")' for citing this R package in publications.
set.seed(1234)
gmm_clust <- Mclust(als_train)
summary(gmm_clust, parameters = TRUE)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm 
## ----------------------------------------------------
## 
## Mclust VEV (ellipsoidal, equal shape) model with 3 components:
## 
##  log.likelihood    n  df          BIC         ICL
##    -58173.55486 2223 213 -117988.6183 -117989.814
## 
## Clustering table:
##    1    2    3 
##  226 1275  722 
## 
## Mixing probabilities:
##            1            2            3 
## 0.1018241000 0.5734008286 0.3247750714 
## 
## Means:
##                              [,1]           [,2]           [,3]
## ALSFRS_slope        -0.8200197169  -0.7233915218  -0.7081302531
## ALT.SGPT._median    34.0180756411  36.6610138175  26.1963970592
## AST.SGOT._median    29.2855010718  30.8136195739  25.9453358135
## Creatinine_median   64.0195481100  70.5752468653  56.0607124353
## Gender_mean          1.6287909441   2.0000000000   1.0000000000
## Glucose_median       6.0075219653   5.4598280770   5.3721272527
## Hematocrit_median    8.6553093852  44.5174123224  40.2111548035
## Platelets_median   251.8749017867 227.2257023774 255.0788761990
## Potassium_max        5.0408541388   4.5798696172   4.5846671539
## pulse_max           90.6906433320  90.0634759858  91.6493071585
## respiratory_median   3.6725058634   3.5914645491   3.5697604768
## 
## Variances:
## [,,1]
##                       ALSFRS_slope ALT.SGPT._median AST.SGOT._median
## ALSFRS_slope         8.81446348733     -28.24232414   -16.8418432032
## ALT.SGPT._median   -28.24232414086    8255.37291299  4301.0002556866
## AST.SGOT._median   -16.84184320320    4301.00025569  3421.1883657596
## Creatinine_median   70.35851689487     206.22510911   365.2892940583
## Gender_mean          2.16258101758      16.62574552     0.9634979045
## Glucose_median      -0.06888216003      56.97903467     9.9591992504
## Hematocrit_median   -8.74610697374    2193.24451046  1291.8107850543
## Platelets_median   -54.78189622066   -5567.82214450 -4028.0135003464
## Potassium_max        0.53610474442     -47.85912967    48.3685668793
## pulse_max           -4.47692229756     175.26341695   204.5793040095
## respiratory_median   4.06177432780     -29.24132195    -1.9115266797
##                    Creatinine_median     Gender_mean   Glucose_median
## ALSFRS_slope             70.35851689    2.1625810176   -0.06888216003
## ALT.SGPT._median        206.22510911   16.6257455225   56.97903466993
## AST.SGOT._median        365.28929406    0.9634979045    9.95919925038
## Creatinine_median      8776.75677163  136.1754955021   98.09823125801
## Gender_mean             136.17549550    2.5745118523    2.96317935000
## Glucose_median           98.09823126    2.9631793500  156.17887078155
## Hematocrit_median      1091.80084023   21.7941051956  891.86868792555
## Platelets_median      -6166.10771507 -205.3216056384 -754.81445506648
## Potassium_max           144.69717674    1.3595727903   59.15896136691
## pulse_max               -46.04724075   -1.4515261409   64.28106396749
## respiratory_median       40.20605100    1.3561763981    2.17583698867
##                    Hematocrit_median Platelets_median  Potassium_max
## ALSFRS_slope            -8.746106974     -54.78189622   0.5361047444
## ALT.SGPT._median      2193.244510459   -5567.82214450 -47.8591296738
## AST.SGOT._median      1291.810785054   -4028.01350035  48.3685668793
## Creatinine_median     1091.800840231   -6166.10771507 144.6971767423
## Gender_mean             21.794105196    -205.32160564   1.3595727903
## Glucose_median         891.868687926    -754.81445507  59.1589613669
## Hematocrit_median     6229.369699490   -5082.06277750 398.0734119160
## Platelets_median     -5082.062777503   97935.59927076  89.9066968249
## Potassium_max          398.073411916      89.90669682  64.3793847232
## pulse_max              518.311213389    -435.67026146   4.5813653115
## respiratory_median      11.293020065     -24.97913353   2.7420579015
##                         pulse_max respiratory_median
## ALSFRS_slope         -4.476922298        4.061774328
## ALT.SGPT._median    175.263416946      -29.241321950
## AST.SGOT._median    204.579304009       -1.911526680
## Creatinine_median   -46.047240747       40.206051004
## Gender_mean          -1.451526141        1.356176398
## Glucose_median       64.281063967        2.175836989
## Hematocrit_median   518.311213389       11.293020065
## Platelets_median   -435.670261455      -24.979133526
## Potassium_max         4.581365311        2.742057902
## pulse_max           302.948544903       -0.193968603
## respiratory_median   -0.193968603        8.768682106
## [,,2]
##                                      ALSFRS_slope
## ALSFRS_slope        0.376472764132785742141606988
## ALT.SGPT._median   -0.195049753309820855795564398
## AST.SGOT._median    0.286927996220803394056275692
## Creatinine_median   0.670915842911480986288097483
## Gender_mean         0.000000000000000005310160414
## Glucose_median     -0.025287237784790771166765211
## Hematocrit_median  -0.010308991321030799154234714
## Platelets_median   -1.617280711064517761954562047
## Potassium_max       0.002372327993399506347593775
## pulse_max          -1.196659541573328544572518695
## respiratory_median  0.062912502190373198462935989
##                                ALT.SGPT._median
## ALSFRS_slope        -0.195049753309820855795564
## ALT.SGPT._median   202.357485555664993626123760
## AST.SGOT._median    96.055006866366753115471511
## Creatinine_median  -28.414437234828870515457311
## Gender_mean          0.000000000000008705617514
## Glucose_median       0.052307030710034875531367
## Hematocrit_median    6.582718204898245772938026
## Platelets_median    -8.412778385798551639140896
## Potassium_max       -0.101923720523368677159937
## pulse_max            9.097337487621983243002433
## respiratory_median   0.097542272362888454706464
##                                AST.SGOT._median
## ALSFRS_slope         0.286927996220803394056276
## ALT.SGPT._median    96.055006866366753115471511
## AST.SGOT._median    74.172736903935714281033142
## Creatinine_median   -8.767553600938230218275748
## Gender_mean          0.000000000000001504849454
## Glucose_median      -0.190870574473019360972614
## Hematocrit_median    2.210007362277997433608334
## Platelets_median   -17.945234692036130752512690
## Potassium_max        0.068284452119214966714367
## pulse_max            0.740776803389732507731935
## respiratory_median   0.209070382447014452287348
##                              Creatinine_median
## ALSFRS_slope         0.67091584291148098628810
## ALT.SGPT._median   -28.41443723482887051545731
## AST.SGOT._median    -8.76755360093823021827575
## Creatinine_median  233.85971614495224457641598
## Gender_mean          0.00000000000000243141532
## Glucose_median      -0.31915666464636965882207
## Hematocrit_median   -2.78567192497759119618195
## Platelets_median   -95.30468321024039823896601
## Potassium_max        0.31561126500699682173590
## pulse_max          -16.67681038229739343137226
## respiratory_median   0.50282267143173320356198
##                                       Gender_mean
## ALSFRS_slope        0.000000000000000005310160414
## ALT.SGPT._median    0.000000000000008705617513923
## AST.SGOT._median    0.000000000000001504849454444
## Creatinine_median   0.000000000000002431415320339
## Gender_mean         0.000365441381557981756749570
## Glucose_median      0.000000000000000035322538160
## Hematocrit_median   0.000000000000000767309356685
## Platelets_median    0.000000000000000128796424816
## Potassium_max      -0.000000000000000023210580728
## pulse_max           0.000000000000000032059726312
## respiratory_median  0.000000000000000019307826839
##                                   Glucose_median
## ALSFRS_slope       -0.02528723778479077116676521
## ALT.SGPT._median    0.05230703071003487553136679
## AST.SGOT._median   -0.19087057447301936097261432
## Creatinine_median  -0.31915666464636965882206709
## Gender_mean         0.00000000000000003532253816
## Glucose_median      0.60763800834882908397105439
## Hematocrit_median   0.04335046886737588950344602
## Platelets_median   -3.51721365349072456751855498
## Potassium_max      -0.00648723166705809321103127
## pulse_max           1.18675900282751034531258938
## respiratory_median  0.01496097148940514140469382
##                               Hematocrit_median
## ALSFRS_slope       -0.0103089913210307991542347
## ALT.SGPT._median    6.5827182048982457729380258
## AST.SGOT._median    2.2100073622779974336083342
## Creatinine_median  -2.7856719249775911961819475
## Gender_mean         0.0000000000000007673093567
## Glucose_median      0.0433504688673758895034460
## Hematocrit_median   6.5576430136922230218488039
## Platelets_median   -1.6868551457926668213360699
## Potassium_max       0.0080328509520460160409083
## pulse_max           4.2013260636575582296359244
## respiratory_median  0.0129731920394417291064748
##                                  Platelets_median
## ALSFRS_slope         -1.6172807110645177619545620
## ALT.SGPT._median     -8.4127783857985516391408964
## AST.SGOT._median    -17.9452346920361307525126904
## Creatinine_median   -95.3046832102403982389660086
## Gender_mean           0.0000000000000001287964248
## Glucose_median       -3.5172136534907245675185550
## Hematocrit_median    -1.6868551457926668213360699
## Platelets_median   2446.4240401498409482883289456
## Potassium_max         1.7542246401960310819845290
## pulse_max            50.4675895256150113254989265
## respiratory_median   -2.3694731052542983285036371
##                                    Potassium_max
## ALSFRS_slope        0.00237232799339950634759377
## ALT.SGPT._median   -0.10192372052336867715993662
## AST.SGOT._median    0.06828445211921496671436671
## Creatinine_median   0.31561126500699682173589622
## Gender_mean        -0.00000000000000002321058073
## Glucose_median     -0.00648723166705809321103127
## Hematocrit_median   0.00803285095204601604090833
## Platelets_median    1.75422464019603108198452901
## Potassium_max       0.11407146124812245213675510
## pulse_max          -0.15434573228099737751684017
## respiratory_median -0.00387037962550750317888837
##                                         pulse_max
## ALSFRS_slope        -1.19665954157332854457251869
## ALT.SGPT._median     9.09733748762198324300243257
## AST.SGOT._median     0.74077680338973250773193513
## Creatinine_median  -16.67681038229739343137225660
## Gender_mean          0.00000000000000003205972631
## Glucose_median       1.18675900282751034531258938
## Hematocrit_median    4.20132606365755822963592436
## Platelets_median    50.46758952561501132549892645
## Potassium_max       -0.15434573228099737751684017
## pulse_max          128.29496776746964314952492714
## respiratory_median  -1.23135434060060711125572652
##                               respiratory_median
## ALSFRS_slope        0.06291250219037319846293599
## ALT.SGPT._median    0.09754227236288845470646436
## AST.SGOT._median    0.20907038244701445228734826
## Creatinine_median   0.50282267143173320356197564
## Gender_mean         0.00000000000000001930782684
## Glucose_median      0.01496097148940514140469382
## Hematocrit_median   0.01297319203944172910647481
## Platelets_median   -2.36947310525429832850363709
## Potassium_max      -0.00387037962550750317888837
## pulse_max          -1.23135434060060711125572652
## respiratory_median  0.35147975339418685569725653
## [,,3]
##                                      ALSFRS_slope
## ALSFRS_slope        0.304081458226215184392771107
## ALT.SGPT._median    0.234412669642543647352539438
## AST.SGOT._median    0.193020337758060711585983427
## Creatinine_median  -0.740197291492217246400286967
## Gender_mean         0.000000000000000001225124671
## Glucose_median      0.014782242817255207209536927
## Hematocrit_median  -0.060443858943519057635995750
## Platelets_median   -3.116797794358534545722250186
## Potassium_max      -0.001829526092394927801881854
## pulse_max          -0.839910176306504663301666369
## respiratory_median  0.048325942064214484628070778
##                                ALT.SGPT._median
## ALSFRS_slope         0.234412669642543647352539
## ALT.SGPT._median   135.474908403300446479988750
## AST.SGOT._median    72.656840023400448558277276
## Creatinine_median  -19.373117660611168133755200
## Gender_mean         -0.000000000000004447763054
## Glucose_median       0.622806710666166329914972
## Hematocrit_median    7.115725667466564097196624
## Platelets_median    -0.813157613059962591783858
## Potassium_max        0.178663032637895374810100
## pulse_max           12.649822796825857196267862
## respiratory_median   0.116232302891555394630529
##                                AST.SGOT._median
## ALSFRS_slope         0.193020337758060711585983
## ALT.SGPT._median    72.656840023400448558277276
## AST.SGOT._median    65.066001558409496396961913
## Creatinine_median    1.915911535218972394645220
## Gender_mean         -0.000000000000004629277048
## Glucose_median      -0.141318901761545379081042
## Hematocrit_median    2.196345620164208423119589
## Platelets_median   -10.640663047084409242870606
## Potassium_max        0.363836054737316449969597
## pulse_max            4.843851142459453029687211
## respiratory_median   0.110633581935144054142484
##                              Creatinine_median
## ALSFRS_slope        -0.74019729149221724640029
## ALT.SGPT._median   -19.37311766061116813375520
## AST.SGOT._median     1.91591153521897239464522
## Creatinine_median  222.60345967844509118549468
## Gender_mean         -0.00000000000000151091172
## Glucose_median       0.79891243949218226916287
## Hematocrit_median   -3.60340234708443452049664
## Platelets_median     2.66250906570993439714812
## Potassium_max        0.35003312792809304632158
## pulse_max          -17.20085119214901325790379
## respiratory_median  -0.24111958543804537513644
##                                       Gender_mean
## ALSFRS_slope        0.000000000000000001225124671
## ALT.SGPT._median   -0.000000000000004447763054306
## AST.SGOT._median   -0.000000000000004629277048158
## Creatinine_median  -0.000000000000001510911720390
## Gender_mean         0.000307497096843337169337629
## Glucose_median      0.000000000000000002863196499
## Hematocrit_median   0.000000000000000783253585155
## Platelets_median   -0.000000000000000007925352988
## Potassium_max      -0.000000000000000036843116365
## pulse_max          -0.000000000000000895221994002
## respiratory_median -0.000000000000000031473067338
##                                    Glucose_median
## ALSFRS_slope        0.014782242817255207209536927
## ALT.SGPT._median    0.622806710666166329914972266
## AST.SGOT._median   -0.141318901761545379081042029
## Creatinine_median   0.798912439492182269162867669
## Gender_mean         0.000000000000000002863196499
## Glucose_median      0.516966007481704958870238897
## Hematocrit_median   0.101845430481395063382343835
## Platelets_median    0.531847043211883696578468061
## Potassium_max       0.003227571694437999984955701
## pulse_max           0.817787168323164115335544011
## respiratory_median  0.001169710935042057257243764
##                               Hematocrit_median
## ALSFRS_slope       -0.0604438589435190576359958
## ALT.SGPT._median    7.1157256674665640971966241
## AST.SGOT._median    2.1963456201642084231195895
## Creatinine_median  -3.6034023470844345204966430
## Gender_mean         0.0000000000000007832535852
## Glucose_median      0.1018454304813950633823438
## Hematocrit_median   5.8219494780370304454208963
## Platelets_median   -2.4276419537058746556112965
## Potassium_max       0.0390729567332353464048644
## pulse_max           3.6913836568552138572840704
## respiratory_median  0.0887206109169108858569786
##                                    Platelets_median
## ALSFRS_slope         -3.116797794358534545722250186
## ALT.SGPT._median     -0.813157613059962591783857988
## AST.SGOT._median    -10.640663047084409242870606249
## Creatinine_median     2.662509065709934397148117569
## Gender_mean          -0.000000000000000007925352988
## Glucose_median        0.531847043211883696578468061
## Hematocrit_median    -2.427641953705874655611296475
## Platelets_median   2062.349535565139376558363437653
## Potassium_max         0.377897998245023181151935887
## pulse_max            35.296073648026606406347127631
## respiratory_median   -0.486888312603901973751874266
##                                    Potassium_max
## ALSFRS_slope       -0.00182952609239492780188185
## ALT.SGPT._median    0.17866303263789537481009972
## AST.SGOT._median    0.36383605473731644996959744
## Creatinine_median   0.35003312792809304632157819
## Gender_mean        -0.00000000000000003684311637
## Glucose_median      0.00322757169443799998495570
## Hematocrit_median   0.03907295673323534640486443
## Platelets_median    0.37789799824502318115193589
## Potassium_max       0.09781068677470353134317804
## pulse_max           0.18589254967795756923187867
## respiratory_median -0.00023819356389362129206022
##                                       pulse_max
## ALSFRS_slope        -0.839910176306504663301666
## ALT.SGPT._median    12.649822796825857196267862
## AST.SGOT._median     4.843851142459453029687211
## Creatinine_median  -17.200851192149013257903789
## Gender_mean         -0.000000000000000895221994
## Glucose_median       0.817787168323164115335544
## Hematocrit_median    3.691383656855213857284070
## Platelets_median    35.296073648026606406347128
## Potassium_max        0.185892549677957569231879
## pulse_max          110.134389147179049928126915
## respiratory_median  -0.494701302870838632852468
##                               respiratory_median
## ALSFRS_slope        0.04832594206421448462807078
## ALT.SGPT._median    0.11623230289155539463052946
## AST.SGOT._median    0.11063358193514405414248358
## Creatinine_median  -0.24111958543804537513643993
## Gender_mean        -0.00000000000000003147306734
## Glucose_median      0.00116971093504205725724376
## Hematocrit_median   0.08872061091691088585697855
## Platelets_median   -0.48688831260390197375187427
## Potassium_max      -0.00023819356389362129206022
## pulse_max          -0.49470130287083863285246821
## respiratory_median  0.30348941115544664226533200
gmm_clust$modelName
## [1] "VEV"

Hence, we have ellipsoidal, equal shaped clusters in our model.

Following is the BIC plot.

plot(gmm_clust$BIC, legendArgs = list(x = "bottom", ncol = 2, cex = 1))

Density Plot

plot(gmm_clust, what = "density")

Classification Plot

plot(gmm_clust, what = "classification")

plot(gmm_clust, what = "uncertainty", dimens = c(2,1), main = "ALSFRS Slope vs. SGPT ")

plot(gmm_clust, what = "uncertainty", dimens = c(4,1), main = "ALSFRS Slope vs. SGPT ")

It can be observed that uncertainity plots do not provide a good estimate of dependence.

gmm_clustDR <- MclustDR(gmm_clust, lambda=1)
summary(gmm_clustDR)
## -----------------------------------------------------------------
## Dimension reduction for model-based clustering and classification 
## -----------------------------------------------------------------
## 
## Mixture model type: Mclust (VEV, 3)
##         
## Clusters    n
##        1  226
##        2 1275
##        3  722
## 
## Estimated basis vectors:
##                               Dir1          Dir2
## ALSFRS_slope       -0.016637210204 -0.0533346714
## ALT.SGPT._median   -0.000575165975  0.0068095316
## AST.SGOT._median    0.001041774565  0.0017412605
## Creatinine_median  -0.000412172210 -0.0005411544
## Gender_mean         0.998910036674  0.9441136589
## Glucose_median     -0.037348371581  0.2876512773
## Hematocrit_median   0.010796483936 -0.0973572505
## Platelets_median    0.000005029877  0.0000336890
## Potassium_max      -0.017886088667  0.1153847314
## pulse_max          -0.000672512832  0.0007270506
## respiratory_median -0.008278072313  0.0148074238
## 
##                  Dir1       Dir2
## Eigenvalues  1.627633   1.458155
## Cum. %      52.746112 100.000000

Boundaries Plot

plot(gmm_clustDR, what = "boundaries", ngrid = 200)

plot(gmm_clustDR, what = "pairs")
plot(gmm_clustDR, what = "scatterplot")

### The above graph shows three different clusters in seperate gaussian planes which signify the diffrent patient phenotypes that can be automatically and reliably identified and used to predict the change of the ALSFRS slope over time.